knitr::opts_chunk$set(echo = TRUE)

library(JM)
library(GLMMadaptive)

source("V:/Users/051528(PM_Afonso)/GitHub/JMbayes2/R/jm.R")
source("V:/Users/051528(PM_Afonso)/GitHub/JMbayes2/R/jm_fit.R")
source("V:/Users/051528(PM_Afonso)/GitHub/JMbayes2/R/help_functions.R")
source("V:/Users/051528(PM_Afonso)/GitHub/JMbayes2/R/basic_methods.R")
source("V:/Users/051528(PM_Afonso)/GitHub/JMbayes2/R/create_Wlong_mats.R")

Functions

mapply2 <- function (FUN, ..., MoreArgs = NULL, USE.NAMES = TRUE) {
        mapply(FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE,
               USE.NAMES = USE.NAMES)
      }

create_HC_X2 <- function (x, z, id) {
    check_tv <- function (x, id) {
        !all(sapply(split(x, id),
                    function (z) all(z - z[1L] < .Machine$double.eps^0.5)))
    }

    cnams_x <- colnames(x)
    cnams_z <- colnames(z)
    if (!"(Intercept)" %in% cnams_x || !"(Intercept)" %in% cnams_z) {
        stop("cannot perform hierarchical centering in the absense of an ",
             "intercept term in both the fixed and random effects design ",
             "matrices.")
    }

    x_in_z <- which(cnams_x %in% cnams_z)
    x_notin_z <- which(!cnams_x %in% cnams_z)
    baseline <- x_notin_z[!apply(x[, x_notin_z, drop = FALSE], 2L, check_tv, id= id)]
    x_notin_z <- setdiff(x_notin_z, baseline)
    if (!length(baseline)) baseline <- as.integer(NA)
    if (!length(x_notin_z)) x_notin_z <- as.integer(NA)
    list(baseline = baseline, x_in_z = x_in_z, x_notin_z = x_notin_z,
         Xbase = x[!duplicated(id), baseline, drop = FALSE])
}

Proposals for create_X_dot function

create_X_dot <- function(Xbase, nT, unq_idL, nres, nfes_HC, baseline, x_in_z_base, x_in_z) {

    n_outcomes <- length(nres) # number of outcomes
    n_res <- sum(nres) # total number of RE

    rows <- split(seq_len(n_res), rep(seq_along(nres), nres)) # all rows (id= 1)

    base_rows <- sapply(rows, head, 1) # rows for baseline (id= 1)
    base_cols <- mapply(function(xzb, b){ which(xzb %in% b)}, x_in_z_base, baseline) # cols for baseline

    RE_rows <- sapply(x_in_z, seq_along) # rows for RE (id= 1)
    RE_cols <- x_in_z # cols for RE

    M <- matrix(0, nrow= n_res*nT, ncol= sum(nfes_HC))

    for (j in seq_len(n_outcomes)) {

        ids <- unq_idL[[j]] # ids present in outcome-j
        ids_rows <- (ids-1) * n_res # 1st row for each id

        M[base_rows[j] + ids_rows, sum(nfes_HC[1:j-1]) + base_cols[[j]]] <- Xbase[[j]] # add baseline

        rows <- sum(nres[1:j-1]) + RE_rows[[j]] + rep(ids_rows, each= length(RE_rows[[j]]))
        cols <- rep(sum(nfes_HC[1:j-1]) + RE_cols[[j]], times= length(ids))
        M[cbind(rows, cols)] <- 1 # add 1 for each RE present in the FE
    }
    M
}

create_X_dot2 <- function (nT, nres, ind_FE_HC, x_in_z, x_in_z_base, unq_idL,
                           Xbase) {
    n_outcomes <- length (nres)
    ind_rows_subject <- rep(seq_len(nT), each = sum(nres))
    ind_rows_outcome <- rep(seq_len(n_outcomes), nres)
    ind_cols <- split(seq_along(ind_FE_HC),
                      rep(seq_len(n_outcomes), sapply(x_in_z_base, length)))

    M <- matrix(0.0, sum(nT * nres), length(ind_FE_HC))
    for (i in seq_len(nT)) {
        for (j in seq_len(n_outcomes)) {
            check <- i %in% unq_idL[[j]]
            if (check) {
                rows <- which(ind_rows_subject == i)[ind_rows_outcome == j]
                cols <- ind_cols[[j]][x_in_z[[j]]]
                M[cbind(rows[1:length(cols)], cols)] <- 1
                if (length(ind_cols[[j]]) > length(cols)) {
                    M[rows[1L], ind_cols[[j]][-x_in_z[[j]]]] <-
                        Xbase[[j]][as.character(i), ]
                }
            }
        }
    }
    M
}

Example 1

Data

set.seed(2020)

nT <- 500 # number of subjects
n_i <- 15 # number of repeated measurements
tmax <- 10 # maximum follow-up time

data1 <- data.frame(id     = rep(seq_len(nT), each = n_i),
                    time1  = c(replicate(nT, c(0, sort(runif(n_i - 1, 1, tmax))))),
                    age1   = rep(runif(nT, min= 18, max= 50), each = n_i),
                    sex1   = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i),
                    drug1  = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i))

data2 <- data.frame(id     = rep(seq_len(nT), each = n_i),
                    time2  = c(replicate(nT, c(0, sort(runif(n_i - 1, 1, tmax))))),
                    age2   = rep(runif(nT, min= 18, max= 50), each = n_i),
                    sex2   = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i),
                    drug2  = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i))

data3 <- data.frame(id     = rep(seq_len(nT), each = n_i),
                    time3  = c(replicate(nT, c(0, sort(runif(n_i - 1, 1, tmax))))),
                    age3   = rep(runif(nT, min= 18, max= 50), each = n_i),
                    sex3   = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i),
                    drug3  = rep(rbinom(nT, size= 1, prob= 0.5), each = n_i))

data1$age1 <- data1$age1 + data1$time1
data2$age2 <- data2$age2 + data2$time2
data3$age3 <- data3$age3 + data3$time3

ids <- sort(sample(seq_len(nT), nT-10)) # 10 patients only have survival outcome
n_na <- 150 # 150 patients (out of 500) missing per outcome (out of 3)
ids_na <- sample(ids, size= n_na*3) 
ids_na <- split(ids_na, rep(1:3, times= n_na))

unq_idL <- lapply(ids_na, setdiff, x = ids)
idL <- lapply(unq_idL, rep, each = n_i)

data1 <- data1[data1$id %in% idL[[1]], ]
data2 <- data2[data2$id %in% idL[[2]], ]
data3 <- data3[data3$id %in% idL[[3]], ]

X <- list(model.matrix(~ 1 + time1 * (drug1 + sex1) + I(time1^2) + age1, data = data1),
          model.matrix(~ 1 + splines::ns(time2, 2) + sex2,                      data = data2),
          model.matrix(~ 1 + time3 + sex3 + drug3 + time3:drug3,                data = data3))

Z <- list(model.matrix(~ 1 + time1 + I(time1^2), data = data1),
          model.matrix(~ 1 + time2,              data = data2),
          model.matrix(~ 1,                      data = data3))

nres <- sapply(Z, ncol)
nfes <- sapply(X, ncol)

componentsHC <- mapply2(create_HC_X2, x= X, z= Z, id= idL)
x_in_z <- lapply(componentsHC, "[[", "x_in_z")
baseline <- lapply(componentsHC, "[[", "baseline")
Xbase <- lapply(componentsHC, "[[", "Xbase")
Xbase[] <- mapply2(function (m, nams) {rownames(m) <- nams; m}, Xbase, unq_idL)
x_in_z_base <- mapply2(function (x, y) sort(c(x, y)), x_in_z, baseline)
ind_FE <- split(seq_len(sum(nfes)), rep(seq_along(X), nfes))
ind_FE_HC <- unlist(mapply2(function (x, ind) x[ind], ind_FE, x_in_z_base),
                    use.names = FALSE)

nfes_HC <- sapply(x_in_z_base, length) # q_dot

X_dot

X_dot <- create_X_dot(Xbase, nT, unq_idL, nres, nfes_HC, baseline, x_in_z_base, x_in_z)

X_dot2 <- create_X_dot2(nT, nres, ind_FE_HC, x_in_z, x_in_z_base, unq_idL, Xbase)

all(X_dot == X_dot2)

microbenchmark::microbenchmark(
  create_X_dot(Xbase, nT, unq_idL, nres, nfes_HC, baseline, x_in_z_base, x_in_z),
  create_X_dot2(nT, nres, ind_FE_HC, x_in_z, x_in_z_base, unq_idL, Xbase)
  )

Example 2

pbc2$prothrombin[pbc2$id == levels(pbc2$id)[1L]] <- NA
pbc2$serBilir[pbc2$id == levels(pbc2$id)[1L]] <- NA
pbc2$ascites[pbc2$id == levels(pbc2$id)[1L]] <- NA

pbc2$prothrombin[pbc2$id == levels(pbc2$id)[2L]] <- NA

fm1 <- lme(log(serBilir) ~ year * (drug + sex) + I(year^2) + age + serChol,
           data = pbc2, random = ~ year + I(year^2)| id, na.action = na.exclude)

fm2 <- lme(prothrombin ~ ns(year, 2) + sex, data = pbc2,
           random = ~ year + I(year^2)| id,
           na.action = na.exclude, control = lmeControl(opt = "optim"))

fm3 <- mixed_model(ascites ~ year + sex, data = pbc2, random = ~ year | id,
                   family = binomial())

Mixed_objects <- list(fm1, fm2, fm3)

Surv_object <- coxph(Surv(years, status2) ~ age, data = pbc2.id)

time_var <- 'year'
functional_forms <- NULL
data_Surv <- NULL
id_var <- NULL
priors <- NULL
control <- NULL

jm()

    con <- list(GK_k = 15L, Bsplines_degree = 2, base_hazard_segments = 10,
                diff = 2L, n_chains = 3L, n_burnin = 500L, n_iter = 3500L,
                n_thin = 1L, seed = 123L, MALA = FALSE,
                save_random_effects = FALSE,
                cores = max(parallel::detectCores() - 1, 1))
    #control <- c(control, list(...))
    namC <- names(con)
    con[(namc <- names(control))] <- control
    if (length(noNms <- namc[!namc %in% namC]) > 0)
        warning("unknown names in control: ", paste(noNms, collapse = ", "))
    if (con$n_burnin > con$n_iter) {
        stop("'n_burnin' cannot be larger than 'n_iter'.")
    }
    # if a single mixed model has been provided put in a list
    if (!inherits(Mixed_objects, "list")) {
        Mixed_objects <- list(Mixed_objects)
    }
    # check if only lme and MixMod have been provided
    if (!all(sapply(Mixed_objects, class) %in% c("lme", "MixMod"))) {
        stop("'Mixed_objects' should be of class 'lme' of 'MixMod'.\n")
    }
    # extract the data from each of the mixed models
    # and check whether the same data have been used;
    # otherwise an error
    datas <- lapply(Mixed_objects, "[[", "data")
    if (!all(sapply(datas[-1L], function (x) isTRUE(all.equal(x, datas[[1L]]))))) {
        stop("It seems that some of the mixed models have been fitted to different versions ",
             "of the dataset. Use the same exact dataset in the calls to lme() ",
             " and mixed_model().")
    }
    dataL <- datas[[1L]]
    rm(datas)
    # extract id variable (again we assume a single grouping variable)
    id_names <- sapply(Mixed_objects, function (object)
        names(if (inherits(object, "MixMod")) object$id[1L] else object$groups[1L]))
    if (!all(id_names == id_names[1L])) {
        stop("it seems that different grouping variables have been used in the mixed models.")
    }
    idVar <- id_names[1L]
    idL <- dataL[[idVar]]
    idL_ind <- lapply(idL, function (x) seq_along(x))
    idL_ind <- mapply2(function (x, y) split(x, y), idL_ind, idL)
    nY <- length(unique(idL))
    # order data by idL and time_var
    if (is.null(dataL[[time_var]])) {
        stop("the variable specified in agument 'time_var' cannot be found ",
             "in the database of the longitudinal models.")
    }
    dataL <- dataL[order(idL, dataL[[time_var]]), ]

    # extract terms from mixed models
    terms_FE <- lapply(Mixed_objects, extract_terms, which = "fixed", data = dataL)
    respVars <- sapply(terms_FE, function (tt) all.vars(tt)[1L])
    respVars_form <- sapply(terms_FE, function (tt) as.character(attr(tt, "variables"))[2L])
    terms_FE_noResp <- lapply(terms_FE, delete.response)
    terms_RE <- lapply(Mixed_objects, extract_terms, which = "random", data = dataL)

    # create model frames
    mf_FE_dataL <- lapply(terms_FE, model.frame.default, data = dataL)
    mf_RE_dataL <- lapply(terms_RE, model.frame.default, data = dataL)

    # we need to account for missing data in the fixed and random effects model frames,
    # in parallel across outcomes (i.e., we will allow that some subjects may have no data
    # for some outcomes)
    NAs_FE_dataL <- lapply(mf_FE_dataL, attr, "na.action")
    NAs_RE_dataL <- lapply(mf_RE_dataL, attr, "na.action")
    mf_FE_dataL <- mapply(fix_NAs_fixed, mf_FE_dataL, NAs_FE_dataL, NAs_RE_dataL,
                          SIMPLIFY = FALSE)
    mf_RE_dataL <- mapply(fix_NAs_random, mf_RE_dataL, NAs_RE_dataL, NAs_FE_dataL,
                          SIMPLIFY = FALSE)

    # create response vectors
    y <- lapply(mf_FE_dataL, model.response)
    y <- lapply(y, function (yy) {
        if (is.factor(yy)) as.numeric(yy != levels(yy)[1L]) else yy
    })
    if (any(sapply(y, function (x) any(!is.finite(x))))) {
        stop("infite value detected in some longitudinal outcomes. These are not allowed.\n")
    }

    # extract families
    families <- lapply(Mixed_objects, "[[", "family")
    families[sapply(families, is.null)] <- rep(list(gaussian()),
                                               sum(sapply(families, is.null)))
    # create the idL per outcome
    # IMPORTANT: some ids may be missing when some subjects have no data for a particular outcome
    # This needs to be taken into account when using idL for indexing. Namely, a new id variable
    # will need to be created in jm_fit()
    unq_id <- unique(idL)
    idL <- mapply(exclude_NAs, NAs_FE_dataL, NAs_RE_dataL,
                  MoreArgs = list(id = idL), SIMPLIFY = FALSE)
    idL <- lapply(idL, match, table = unq_id)
    # the index variable idL_lp is to be used to subset the random effects of each outcome
    # such that to calculate the Zb part of the model as rowSums(Z * b[idL_lp, ]). This
    # means that for outcomes that miss some subjects, we recode the id variable from 1
    # until n', where n' is the number of subjects available for the respective outcome
    idL_lp <- lapply(idL, function (x) match(x, unique(x)))
    # the unique values of idL is to be used in specifying which subjects have which outcomes
    # this is relevant in the calculation of the log density / probability mass function
    # for the longitudinal outcomes
    unq_idL <- lapply(idL, unique)

    # create design matrices for mixed models
    X <- mapply(model.matrix.default, terms_FE, mf_FE_dataL, SIMPLIFY = FALSE)
    Z <- mapply(model.matrix.default, terms_RE, mf_RE_dataL, SIMPLIFY = FALSE)
    if (length(Z) == 1 && ncol(Z[[1]]) == 1) {
        stop("jm() does not currently work when you have a single ",
             "longitudinal outcome and only random intercepts.")
    }
    nres <- sapply(Z, ncol)
    ind_RE <- split(seq_len(sum(nres)), rep(seq_along(Z), nres))
    componentsHC <- mapply2(create_HC_X2, X, Z, idL)
    Xbase <- lapply(componentsHC, "[[", "Xbase")
    Xbase[] <- mapply2(function (m, nams) {rownames(m) <- nams; m}, Xbase, unq_idL)
    baseline <- lapply(componentsHC, "[[", "baseline")
    x_in_z <- lapply(componentsHC, "[[", "x_in_z")
    x_notin_z <- lapply(componentsHC, "[[", "x_notin_z")
    nfes <- sapply(X, ncol)
    # 'ind_FE' is used in vec2field() to re-create the field of betas
    # from betas_vec
    ind_FE <- split(seq_len(sum(nfes)), rep(seq_along(X), nfes))
    x_in_z_base <- mapply2(function (x, y) sort(c(x, y)), x_in_z, baseline)
    # 'ind_FE_HC' denotes which elements of betas_vec are in the HC formulation
    # this will be use to save the results in the corresponding columns
    ind_FE_HC <- unlist(mapply2(function (x, ind) x[ind], ind_FE, x_in_z_base),
                        use.names = FALSE)
    # 'ind_FE_HC' denotes which elements of betas_vec are not in the
    # HC formulation. It is a list, to be used in conjuction with
    # has_tilde_betas. That is, if has_tilde_betas = TRUE, we need to save
    # from the Metropolis-Hastings step the betas in the columns 'ind_FE_nHC'
    ind_FE_nHC <- mapply2(function (x, ind) x[-ind], ind_FE, x_in_z_base)
    has_tilde_betas <- as.integer(sapply(ind_FE_nHC, length) > 0)
    ind_FE_nHC[] <- lapply(ind_FE_nHC, function (x) if (length(x)) x else 0L)
    ########################################################
    ########################################################
    # try to recover survival dataset
    if (is.null(data_Surv) || !is.data.frame(data_Surv)) {
        dataS <- try(eval(Surv_object$call$data, envir = parent.frame()),
                     silent = TRUE)
        if (inherits(dataS, "try-error")) {
            stop("could not recover the dataset used to fit the Cox/AFT model; ",
                 "please provide this dataset in the 'data_Surv' argument of jm().")
        }
    } else {
        dataS <- data_Surv
    }

    # if the longitudinal outcomes are not in dataS, we set a random value for
    # them. This is needed for the calculation of the matrix of interaction terms
    # between the longitudinal outcomes and other variables.
    for (i in seq_along(respVars)) {
        if (is.null(dataS[[respVars[i]]])) dataS[[respVars[i]]] <- rnorm(nrow(dataS))
    }
    # if the time_var is not in dataS set it to a random number
    if (is.null(dataS[[time_var]])) dataS[[time_var]] <- rnorm(nrow(dataS))
    # terms for survival model
    terms_Surv <- Surv_object$terms
    terms_Surv_noResp <- delete.response(terms_Surv)
    mf_surv_dataS <- model.frame.default(terms_Surv, data = dataS)

    # survival times
    Surv_Response <- model.response(mf_surv_dataS)
    type_censoring <- attr(Surv_Response, "type")
    if (is.null(dataS[[idVar]])) {
        if (is.null(id_var)) {
            stop("cannot extract the subject id variable from the dataset used to fit the ",
                 "survival model. Please include this variable in the dataset ",
                 "and/or specify the 'id_var' argument.\n")
        } else {
            idT <- dataS[[id_var]]
        }
    } else {
        idT <- dataS[[idVar]]
    }
    if (!is.null(NAs_surv <- attr(mf_surv_dataS, "na.action"))) {
        idT <- idT[-NAs_surv]
        dataS <- dataS[-NAs_surv, ]
    }
    idT <- factor(idT, levels = unique(idT))

    nT <- length(unique(idT))
    if (nY != nT) {
        stop("the number of groups/subjects in the longitudinal and survival datasets ",
             "do not seem to match. A potential reason why this may be happening is ",
             "missing data in some covariates used in the individual models.")
    }
    if (!all(idT %in% dataL[[idVar]])) {
        stop("it seems that some of the levels of the id variable in the survival dataset",
             "cannot be found in the dataset of the Mixed_objects. Please check that ",
             "the same subjects/groups are used in the datasets used to fit the mixed ",
             "and survival models. Also, the name of the subjects/groups variable ",
             "in the different datasets used to fit the individual models ",
             "needs to be the same in all of the datasets.")
    }
    # we need to check that the ordering of the subjects in the same in dataL and dataS.
    # If not, then a warning and do it internally
    if (!all(order(unique(idT)) == order(unique(dataL[[idVar]])))) {
        warning("It seems that the ordering of the subjects in the dataset used to fit the ",
                "mixed models and the dataset used for the survival model is not the same. ",
                "We set internally the datasets in the same order, but it would be best ",
                "that you do it beforehand on your own.")
        dataS <- dataS[order(idT), ]
        mf_surv_dataS <- model.frame.default(terms_Surv, data = dataS)
        Surv_Response <- model.response(mf_surv_dataS)
    }
    # Notation:
    #  - Time_right: event or right censoring time
    #  - Time_left: left censoring time
    #  - trunc_Time: truncation time
    #  - delta: 0 of right censored, 1 for event, 2 for left censored,
    #           3 for interval censored
    if (type_censoring == "right") {
        Time_right <- unname(Surv_Response[, "time"])
        Time_left <- Time_start <- trunc_Time <- rep(0.0, nrow(dataS))
        delta <-  unname(Surv_Response[, "status"])
    } else if (type_censoring == "counting") {
        Time_start <- unname(Surv_Response[, "start"])
        Time_stop <- unname(Surv_Response[, "stop"])
        delta <-  unname(Surv_Response[, "status"])
        Time_right <- tapply(Time_stop, idT, tail, n = 1) # time of event
        trunc_Time <- tapply(Time_start, idT, head, n = 1) # possible left truncation time
        Time_left <- rep(0.0, nrow(dataS))
        delta <- tapply(delta, idT, tail, n = 1) # event indicator at Time_right
    } else if (type_censoring == "interval") {
        Time1 <-  unname(Surv_Response[, "time1"])
        Time2 <-  unname(Surv_Response[, "time2"])
        trunc_Time <- Time_start <- rep(0.0, nrow(dataS))
        delta <- unname(Surv_Response[, "status"])
        Time_right <- Time1
        Time_right[delta == 3] <- Time2[delta == 3]
        Time_right[delta == 2] <- 0.0
        Time_left <- Time1
        Time_left[delta <= 1] <- 0.0
    }
    names(Time_right) <- names(Time_left) <- names(Time_start) <- idT
    which_event <- which(delta == 1)
    which_right <- which(delta == 0)
    which_left <- which(delta == 2)
    which_interval <- which(delta == 3)
    # extract strata if present otherwise all subjects in one stratum
    ind_strata <- attr(terms_Surv, "specials")$strata
    strata <- if (is.null(ind_strata)) {
        rep(1, nrow(mf_surv_dataS))
    } else {
        unclass(mf_surv_dataS[[ind_strata]])
    }
    n_strata <- length(unique(strata))

    # 'Time_integration' is the upper limit of the integral in likelihood
    # of the survival model. For subjects with event (delta = 1), for subjects with
    # right censoring and for subjects with interval censoring we need to integrate
    # up to 'Time_right'. For subjects with left censoring we need to integrate up to
    # 'Time_left'; hence we set for them 'Time_integration = Time_left'.
    # For subjects with interval censoring we need two integrals from 0 to 'Time_right'
    # and also from 0 to 'Time_left'. However, in the Gauss-Kronrod approximation it
    # can happen that the first integral has a lower value than the second one, which is
    # not correct. To overcome this issue, for interval censored data we first approximate
    # the integral from 0 to 'Time_left'. And then the integral from 0 to 'Time_right' is
    # set equal to first integral plus the integral from 'Time_left' to 'Time_right'. For
    # this second integral we introduce the variable 'Time_integration2' which is equal to
    # 'Time_right', i.e., for interval censored data 'Time_integration' is set equal to
    # 'Time_left'
    Time_integration <- Time_right
    Time_integration[which_left] <- Time_left[which_left]
    Time_integration[which_interval] <- Time_left[which_interval]
    Time_integration2 <- rep(0.0, length(Time_integration))
    if (length(which_interval)) {
        Time_integration2[which_interval] <- Time_right[which_interval]
    }

    # create Gauss Kronrod points and weights
    GK <- gaussKronrod(con$GK_k)
    sk <- GK$sk
    P <- c(Time_integration - trunc_Time) / 2
    st <- outer(P, sk) + (c(Time_integration + trunc_Time) / 2)
    log_Pwk <- rep(log(P), each = length(sk)) + rep_len(log(GK$wk), length.out = length(st))
    if (length(which_interval)) {
        # we take the absolute value because for the subjects for whom we do not have
        # interval censoring P2 will be negative and this will produce a NA when we take
        # the log in 'log_Pwk2'
        P2 <- abs(Time_integration2 - Time_integration) / 2
        st2 <- outer(P2, sk) + (c(Time_integration2 + Time_integration) / 2)
        log_Pwk2 <- rep(log(P2), each = length(sk)) +
            rep_len(log(GK$wk), length.out = length(st2))
    } else {
        P2 <- st2 <- log_Pwk2 <- rep(0.0, nT * con$GK_k)
    }

    # knots for the log baseline hazard function
    if (is.null(con$knots)) {
        qs <- quantile(c(Time_right, Time_left), probs = c(0.1, 0.9))
        con$knots <- knots(qs[1L], qs[2L], con$base_hazard_segments,
                           con$Bsplines_degree)
    }
    .knots_base_hazard <- con$knots
    env <- new.env(parent = .GlobalEnv)
    assign(".knots_base_hazard", con$knots, envir = env)

    # Extract functional forms per longitudinal outcome
    if (any(!names(functional_forms) %in% respVars_form)) {
        stop("unknown names in the list provided in the 'functional_forms' argument; as names ",
             "of the elements of this list you need to use the response variables from ",
             "the multivariate mixed model.\n")
    }
    # for outcomes not specified in Formulas use the value parameterization
    not_specified <- !respVars_form %in% names(functional_forms)
    if (any(not_specified)) {
        functional_forms_ns <- lapply(respVars_form[not_specified],
                                      function (v) as.formula(paste0("~ value(", v, ")")))
        names(functional_forms_ns) <- respVars_form[not_specified]
        functional_forms <- c(functional_forms, functional_forms_ns)
    }
    functional_forms <- functional_forms[order(match(names(functional_forms),
                                                     respVars_form))]
    ###################################################################
    # List of lists
    # One list component per association structure per outcome
    # List components vectors of integers corresponding to the term
    # each association structure corresponds to
    set_env <- function (form, env) {environment(form) <- env; form}
    functional_forms[] <- lapply(functional_forms, set_env, env = env)
    FunForms_per_outcome <- lapply(functional_forms, extract_functional_forms,
                                   data = dataS)
    FunForms_per_outcome <- lapply(FunForms_per_outcome,
                                   function (x) x[sapply(x, length) > 0])
    collapsed_functional_forms <- lapply(FunForms_per_outcome, names)

    #####################################################

    # design matrices for the survival submodel:
    #  - W0 is the design matrix for the log baseline hazard
    #  - W is the design matrix for the covariates in the Surv_object
    #    (including exogenous time-varying covariates)
    #  - X is the design matrix for the fixed effects, per outcome and functional form
    #  - Z is the design matrix for the random effects, per outcome and functional form
    #  - U is the design matrix for possible interaction terms in functional forms
    #  - Wlong is the design matrix for the longitudinal outcomes in the survival submodel
    #    that is already multiplied with the interaction terms matrix U
    # in the above design matrices we put the "_h" to denote calculation at the event time
    # 'Time_right', we put "_H" to denote calculation at the 'Time_integration', and
    # "_H2" to denote calculation at the 'Time_integration2'.
    strata_H <- rep(strata, each = con$GK_k)
    W0_H <- create_W0(c(t(st)), con$knots, con$Bsplines_degree + 1, strata_H)
    dataS_H <- SurvData_HazardModel(st, dataS, Time_start,
                                    paste0(idT, "_", strata), time_var)
    mf <- model.frame.default(terms_Surv_noResp, data = dataS_H)
    W_H <- construct_Wmat(terms_Surv_noResp, mf)
    any_gammas <- as.logical(ncol(W_H))
    if (!any_gammas) {
        W_H <- matrix(0.0, nrow = nrow(W_H), ncol = 1L)
    }
    X_H <- desing_matrices_functional_forms(st, terms_FE_noResp,
                                            dataL, time_var, idVar,
                                            collapsed_functional_forms)
    Z_H <- desing_matrices_functional_forms(st, terms_RE,
                                            dataL, time_var, idVar,
                                            collapsed_functional_forms)
    U_H <- lapply(functional_forms, construct_Umat, dataS = dataS_H)
    if (length(which_event)) {
        W0_h <- create_W0(Time_right, con$knots, con$Bsplines_degree + 1, strata)
        dataS_h <- SurvData_HazardModel(Time_right, dataS, Time_start,
                                        paste0(idT, "_", strata), time_var)
        mf <- model.frame.default(terms_Surv_noResp, data = dataS_h)
        W_h <- construct_Wmat(terms_Surv_noResp, mf)
        if (!any_gammas) {
            W_h <- matrix(0.0, nrow = nrow(W_h), ncol = 1L)
        }
        X_h <- desing_matrices_functional_forms(Time_right, terms_FE_noResp,
                                                dataL, time_var, idVar,
                                                collapsed_functional_forms)
        Z_h <- desing_matrices_functional_forms(Time_right, terms_RE,
                                                dataL, time_var, idVar,
                                                collapsed_functional_forms)
        U_h <- lapply(functional_forms, construct_Umat, dataS = dataS_h)
    } else {
        W0_h <- W_h <- matrix(0.0)
        X_h <- Z_h <- U_h <- rep(list(matrix(0.0)), length(respVars))
    }
    if (length(which_interval)) {
        W0_H2 <- create_W0(c(t(st2)), con$knots, con$Bsplines_degree + 1,
                           strata_H)
        dataS_H2 <- SurvData_HazardModel(st2, dataS, Time_start,
                                         paste0(idT, "_", strata), time_var)
        mf2 <- model.frame.default(terms_Surv_noResp, data = dataS_H2)
        W_h <- construct_Wmat(terms_Surv_noResp, mf2)
        if (!any_gammas) {
            W_H2 <- matrix(0.0, nrow = nrow(W_H2), ncol = 1L)
        }
        X_H2 <- desing_matrices_functional_forms(st, terms_FE_noResp,
                                                 dataL, time_var, idVar,
                                                 collapsed_functional_forms)
        Z_H2 <- desing_matrices_functional_forms(st, terms_RE,
                                                 dataL, time_var, idVar,
                                                 collapsed_functional_forms)
        U_H <- lapply(functional_forms, construct_Umat, dataS = dataS_H2)
    } else {
        W0_H2 <- W_H2 <- matrix(0.0)
        X_H2 <- Z_H2 <- U_H2 <- rep(list(matrix(0.0)), length(respVars))
    }
    nfes_HC <- sapply(x_in_z_base, length)
    out_in <- sapply(idL, "%in%", x = seq_len(nT))
    all_pat <- apply(out_in, 1L, paste0, collapse = "/")
    id_patt <- match(all_pat, unique(all_pat))
    find_patt <- function (patt, n) which(rep(patt, times = n))
    ind_RE_patt <- apply(unique(out_in), 1L, find_patt, n = nres)
    ind_FE_patt <- apply(unique(out_in), 1L, find_patt, n = nfes_HC)
X_dot <- create_X_dot(Xbase, nT, unq_idL, nres, nfes_HC, baseline, x_in_z_base, x_in_z)

X_dot2 <- create_X_dot2(nT, nres, ind_FE_HC, x_in_z, x_in_z_base, unq_idL, Xbase)

all(X_dot == X_dot2)

microbenchmark::microbenchmark(
  create_X_dot(Xbase, nT, unq_idL, nres, nfes_HC, baseline, x_in_z_base, x_in_z),
  create_X_dot2(nT, nres, ind_FE_HC, x_in_z, x_in_z_base, unq_idL, Xbase)
)

set.seed(2020)
id <- sample(seq_len(nT), 1)
rows <- lapply(idL, match, x=id) 
mapply(function(XX, r){XX[r,]}, X, rows)

X_dot[seq_len(sum(nres)) + (id-1)*sum(nres),]


drizopoulos/JMbayes2 documentation built on July 15, 2024, 11:13 p.m.